library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v readr   1.3.1
## v tibble  1.4.2     v purrr   0.2.5
## v tidyr   0.8.2     v stringr 1.3.1
## v ggplot2 3.1.0     v forcats 0.4.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(cluster)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
domestict20Bowling <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\domestict20careerbowlingrating_mod.csv")

head(domestict20Bowling)
##            Name Balls Maidens Runs Wickets Average X4_Wicket_Hauls
## 1   S L Malinga  4835      13 5393     299   18.03               8
## 2    S P Narine  4112      15 3856     227   16.98              10
## 3   D J J Bravo  5781       2 7772     317   24.51               7
## 4 Shahid Afridi  4650       9 5177     236   21.93               4
## 5    A C Thomas  4558      15 5739     263   21.82               4
## 6    D P Nannes  4624       9 5719     257   22.25               7
##   X5_Wicket_Hauls Strike_Rate Economy    Rating ScaledRating LogRating
## 1               4       16.17    6.69 11985.215        20.93  4.078646
## 2               1       18.11    5.62  9781.489        19.89  3.990405
## 3               1       18.23    8.06  9276.494        19.63  3.967384
## 4               2       19.70    6.68  7491.174        18.61  3.874550
## 5               1       17.33    7.55  7276.598        18.47  3.861928
## 6               2       17.99    7.42  7198.086        18.42  3.857217
##   Best_Bowling
## 1       06-Jul
## 2       May-19
## 3       May-23
## 4       05-Jul
## 5       May-24
## 6       May-31
summary(domestict20Bowling)
##                Name         Balls         Maidens           Runs       
##  S Sharma        :  2   Min.   :   1   Min.   : 0.00   Min.   :   0.0  
##  Yuvraj Singh    :  2   1st Qu.: 202   1st Qu.: 0.00   1st Qu.: 252.5  
##  A A Chavan      :  1   Median : 689   Median : 1.00   Median : 874.0  
##  A A Jhunjhunwala:  1   Mean   :1094   Mean   : 2.03   Mean   :1353.2  
##  A A Kazi        :  1   3rd Qu.:1636   3rd Qu.: 3.00   3rd Qu.:2008.5  
##  A A Noffke      :  1   Max.   :5781   Max.   :19.00   Max.   :7772.0  
##  (Other)         :387                                                  
##     Wickets          Average       X4_Wicket_Hauls   X5_Wicket_Hauls 
##  Min.   :  0.00   Min.   :  0.00   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:  9.00   1st Qu.: 21.61   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median : 36.00   Median : 24.60   Median : 0.0000   Median :0.0000  
##  Mean   : 55.12   Mean   : 25.42   Mean   : 0.9392   Mean   :0.2329  
##  3rd Qu.: 81.50   3rd Qu.: 29.23   3rd Qu.: 1.0000   3rd Qu.:0.0000  
##  Max.   :317.00   Max.   :162.00   Max.   :10.0000   Max.   :4.0000  
##                                                                      
##   Strike_Rate        Economy           Rating           ScaledRating   
##  Min.   :  0.00   Min.   : 0.000   Min.   :    0.000   Min.   : 0.000  
##  1st Qu.: 17.02   1st Qu.: 7.175   1st Qu.:    8.067   1st Qu.: 3.370  
##  Median : 19.75   Median : 7.660   Median :  140.498   Median : 6.890  
##  Mean   : 19.78   Mean   : 7.849   Mean   :  740.900   Mean   : 7.173  
##  3rd Qu.: 22.59   3rd Qu.: 8.170   3rd Qu.:  709.812   3rd Qu.:10.325  
##  Max.   :114.00   Max.   :24.000   Max.   :11985.215   Max.   :20.930  
##                                                                        
##    LogRating        Best_Bowling
##  Min.   :-1.3802   Mar-18 : 10  
##  1st Qu.: 0.9064   Apr-15 :  9  
##  Median : 2.1477   Mar-13 :  9  
##  Mean   : 1.8447   Apr-13 :  8  
##  3rd Qu.: 2.8511   Apr-14 :  8  
##  Max.   : 4.0786   04-Oct :  7  
##                    (Other):344
set.seed(20)

domesticBowlCluster <- kmeans(domestict20Bowling[, 2:13], 5)

domesticBowlCluster$cluster <- as.factor(domesticBowlCluster$cluster)

ggplot(domestict20Bowling, aes(Economy, Wickets, color = domesticBowlCluster$cluster)) +
  geom_point(size = 2) +
  scale_color_hue(labels = c("Bad players", "Good players", "Best Players")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ggtitle("Domestic T20 Bowling Average vs Wickets")

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- plot_ly(domestict20Bowling, x = ~Economy, y = ~Wickets, type = 'scatter', 
             mode = 'markers', color = domesticBowlCluster$cluster, 
             text = ~paste('Name: ', Name)) %>%
  layout(title = "Cluster of Averages (batsmen)")

p